library(sandwich)

## function that returns 1 if a given ISIC code is manufacturing based on ISIC version 3.1
Ismanuf <- function(isic){
    if(is.na(isic)){
        isic <- 0
    }
    isic <- as.character(isic)
    if(nchar(isic)==3){
        isic <- paste("0", isic, sep="")
    }
    manuf <- 0
    if(as.numeric(substring(isic,1,2))>=15 & as.numeric(substring(isic,1,2))<=37){
        manuf <- 1
    }
    return(manuf)
}

## retuns whether a givin ISIC industry belongs to either Agriculture or Mining based on ISIC version 3.1
Isagric <- function(isic){
    if(is.na(isic)){
        isic <- 0
    }
    isic <- as.character(isic)
    if(nchar(isic)==3){
        isic <- paste("0", isic, sep="")
    }
    agric <- 0
    if(as.numeric(substring(isic,1,2))>=0 & as.numeric(substring(isic,1,2))<=14){
        agric <- 1
    }
    return(agric)
}


# Clustered standard errors
cluster_se <- function(model, cluster){
    
  if(nrow(model.matrix(model))!=length(cluster)){
      stop("check your data: cluster variable has different N than model - you may have observations with missing data")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  if(M<50){
      warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat. = crossprod(uj)/N)
  return(rcse.cov)
}


getISIC2 <- function(isic){
    if(is.na(isic)){
        return(NA)
        ## isic <- 0
    }
    isic <- as.character(isic)
    if(nchar(isic)==3){
        isic <- paste("0", isic, sep="")
    }
    manuf <- 0
    return(substring(isic,1,2))
}

IsAgrExporter <- function(proccode){
    proccode <- as.character(proccode)
    
    if(proccode == ""){
        return(NA)
    } else {
        ## identifying top exporting product
        firm <- procomer[procomer$procomerID == proccode, ]
        firm <- na.omit(firm)
        hs <- as.character(firm[which(firm$value == max(firm$value))[1], "hs10"])
        hs <- gsub("^p", "", hs)
        if(nchar(hs) == 9){
           hs <- paste("0", hs, sep="")
        }
        hs2 <- substring(hs, 1,2)
        agric <- c(as.character(paste("0", 1:9, sep="")), 10:24)
        if(hs2 %in% agric){
            return(1)
        } else {
            return(0)
        }
    }
}


## using most recent trade data
getHS_10 <- function(procomerID){
    id <- as.character(procomerID)
    sub.proc <- procomer[which(procomer$procomerID == id),]
    ## get most recent year
    year.recent <- max(as.numeric(as.character(sub.proc$year)))
    ## sub.i <- procomer[which(procomer$procomerID == id & procomer$year==year.recent),]
    sub.i <- procomer[which(procomer$procomerID == id & procomer$year>=(year.recent-5)),]

    uniq.hs10 <- as.character(unique(sub.i$hs10))
    by_hs10 <- group_by(sub.i, hs10)
    hs10sum <- as.data.frame(summarise(by_hs10,
                         sum = sum(value, na.rm = TRUE)))
    
    return(hs10sum)
}



## using most recent trade data
getHS_6 <- function(procomerID){
    id <- as.character(procomerID)
    sub.proc <- procomer[which(procomer$procomerID == id),]
    ## get most recent year
    year.recent <- max(as.numeric(as.character(sub.proc$year)))
    sub.i <- procomer[which(procomer$procomerID == id & procomer$year==year.recent),]
    ## sub.i <- procomer[which(procomer$procomerID == id & procomer$year>=(year.recent-3)),]
    sub.i$hs6 <- substring(sub.i$hs10,1,6)
    by_hs6 <- group_by(sub.i, hs6)
    hs6sum <- as.data.frame(summarise(by_hs6,
                                      sum = sum(value, na.rm = TRUE)))
    return(hs6sum)
}

procomerID <- "E2933"
## number of destinations
getHS_6 <- function(procomerID){
    id <- as.character(procomerID)
    sub.proc <- procomer[which(procomer$procomerID == id),]
    ## get most recent year
    year.recent <- max(as.numeric(as.character(sub.proc$year)))
    sub.i <- procomer[which(procomer$procomerID == id & procomer$year==year.recent),]
    ## sub.i <- procomer[which(procomer$procomerID == id & procomer$year>=(year.recent-3)),]
    sub.i$hs6 <- substring(sub.i$hs10,1,6)
    by_hs6 <- group_by(sub.i, hs6)
    hs6sum <- as.data.frame(summarise(by_hs6,
                                      sum = sum(value, na.rm = TRUE)))
    return(hs6sum)
}

## number of destinations
getInfo <- function(procomerID){
    id <- as.character(procomerID)
    sub.proc <- procomer[which(procomer$procomerID == id),]
    ## get most recent year
    year.recent <- max(as.numeric(as.character(sub.proc$year)))
    sub.i <- procomer[which(procomer$procomerID == id & procomer$year==year.recent),]
    ndest <- length(unique(sub.i$destination))
    nproduct <- length(unique(sub.i$hs10))
    totalv <- sum(sub.i$value, na.rm=T)
    nyears <- length(unique(sub.proc$year))
    
    info <- data.frame(procomerID=id, year_recent=year.recent,
                       totalv_recent=totalv,
                       ndest=ndest, nproduct=nproduct, nyears=nyears)
    return(info)
}
